*********************************************************************************
*This file cleans data of Chinese imports from China of the UN Comtrade database*
*********************************************************************************

*Import file, generate variable with state name using file name, rename variables, save files and append them

*Europe
cd "$raw_data_imports/us europe"
set more off

local allfiles : dir "$raw_data_imports/us europe" files "comt*" 
tempfile temp_europe
clear
save temp_europe, emptyok replace
foreach file of local allfiles {
    import delimit using `file', varnames(1) clear
	quiet tostring *, replace force
    quiet append using temp_europe
    save temp_europe, replace
}

rename partner partner_class

drop period-tradeflowcode
drop reportercode reporteriso partnercode 
drop partneriso-modeoftransport
drop qtyunitcode-grossweightkg
drop ciftradevalueus-flag
drop tradeflow
rename tradevalueus imports

destring year imports, replace

gen country = "us"

collapse (firstnm) country, by(partner_class year commoditycode imports)

save "$clean_data_imports/europe_imports_raw", replace
rm "$raw_data_imports/us europe/temp_europe.dta"


*GENERATE ONE STATA DATASET FOR EACH COUNTRY (US, CH, GER, ...)
set more off
use "$clean_data_imports/europe_imports_raw", clear
levelsof country, local(levels) 
foreach s of local levels {
	use "$clean_data_imports/europe_imports_raw", clear
	keep if country == "`s'"
	sort year partner commoditycode
	save "$clean_data_imports/comtrade_europe_`s'", replace
}

*******************************************************************************************
*							HS standardization (to HS2002)
*******************************************************************************************
*Standardize everything to HS02 such that afterwards there is a perfect match with Dorn's crosswalk to SIC
foreach country in us {

	use "$clean_data_imports/comtrade_europe_`country'", clear

	*Replace this code which is not recognized otherwise in Dorn's classification
	replace commoditycode = "640419" if commoditycode == "640320"

	replace commoditycode = "0" + commoditycode if length(commoditycode) == 5

	gen hs17  = commoditycode
	gen hs12  = commoditycode
	gen hs07  = commoditycode

	save "$clean_data_imports/comtrade_europe_`country'", replace

	*HS17 to HS12
	import excel using "$raw_data_imports/china shock/hs17 to hs12.xlsx", firstrow clear
	replace hs17 = "0" + hs17 if length(hs17) == 5
	replace hs12 = "0" + hs12 if length(hs12) == 5

	merge 1:m hs17 using "$clean_data_imports/comtrade_europe_`country'"
	replace commoditycode = hs12 if _merge == 3 
	drop if _merge==1
	drop _merge

	collapse (firstnm) country hs12 hs07 (sum) imports, by(partner_class year commoditycode)

	save "$clean_data_imports/comtrade_europe_`country'", replace

	*HS12 to HS07
	import excel using "$raw_data_imports/china shock/hs12 to hs07.xlsx", firstrow clear
	replace hs12 = "0" + hs12 if length(hs12) == 5
	replace hs07 = "0" + hs07 if length(hs07) == 5

	merge 1:m hs12 using "$clean_data_imports/comtrade_europe_`country'"
	replace commoditycode = hs07 if _merge == 3 
	drop if _merge==1
	drop _merge

	collapse (firstnm) country hs07 (sum) imports, by(partner_class year commoditycode)

	save "$clean_data_imports/comtrade_europe_`country'", replace

	*HS07 to HS02
	import excel using "$raw_data_imports/china shock/hs07 to hs02.xlsx", firstrow clear
	replace hs07 = "0" + hs07 if length(hs07) == 5
	replace hs02 = "0" + hs02 if length(hs02) == 5

	merge 1:m hs07 using "$clean_data_imports/comtrade_europe_`country'"
	replace commoditycode = hs02 if _merge == 3 
	drop if _merge==1
	drop _merge

	collapse (firstnm) country (sum) imports, by(partner_class year commoditycode)

	save "$clean_data_imports/comtrade_europe_`country'", replace
}


*******************************************************************************************
*								Imports conversion (HS to SIC)
*******************************************************************************************

/*Need to create dataset with all combinations of country, year, hs (and with more than one hs if the same 
hs matches to different sic) to match crosswalk using m:1 with imports*/
*import delimit "01_Temp/04_conversion_hs_to_sic/tables/combi.csv", varnames(1) clear
import delimit "$raw_data_imports/china shock/id_crosswalk.csv", varnames(1) clear
cap rename ïid id
replace partner_class = "" 
replace partner_class = "Denmark" if _n==1
replace partner_class = "France" if _n==2
replace partner_class = "Germany" if _n==3
replace partner_class = "Italy" if _n==4
replace partner_class = "Sweden" if _n==5
replace partner_class = "United Kingdom" if _n==6
replace partner_class = "Finland" if _n==7
replace partner_class = "Spain" if _n==8
replace year =. 
replace year = 1993 if _n==1
replace year = 2000 if _n==2
replace year = 2007 if _n==3
replace year = 2014 if _n==4

fillin partner_class year id 
drop _fillin hs6 sic87dd share
drop if partner_class=="" | year==.
save "$clean_data_imports/all_combis_europe.dta", replace

import delimit "$raw_data_imports/china shock/id_crosswalk.csv", varnames(1) clear
*rename ïid id
drop partner_class year
merge 1:m id using "$clean_data_imports/all_combis_europe.dta" 
drop _merge 

sort id partner_class year
save "$clean_data_imports/hs_sic_partner_europe_year", replace

tostring hs6 sic87dd, replace
rename hs6 hs
replace hs = "0" + hs if length(hs) == 5
replace sic87dd = "0" + sic87dd if length(sic87dd) == 3
drop if sic87dd == "." //values that are missing in Dorn's crosswalk
save "$clean_data_imports/hs_sic_partner_europe_year", replace

*Imports of countries expressed in sic
*(if no merge because of using, no problem, since from all combinations, there may not
*be imports for this combination)
*those who do not merge in the master, do not merge because the code is not in 
*Dorn's crosswalk list
set more off
foreach country in us {
		use "$clean_data_imports/comtrade_europe_`country'", clear
		rename commoditycode hs
		replace hs = "0" + hs if length(hs) == 5
		replace hs = "999999" if hs == "9999AA"
		drop if hs == "0TOTAL"
		sort year hs
	
		*Converge HS to SIC
		sort hs partner_class year
		drop if hs == "999999"
		*Drop duplicates from egen function
		bysort hs year partner_class: gen d = cond(_N==1,0,_n)
		drop if d>1
		drop d
		
		merge 1:m hs year partner_class using "$clean_data_imports/hs_sic_partner_europe_year"
		replace sic87dd = "2911" if hs == "271013"|hs == "271014"|hs == "271016"|hs == "271021"|hs == "271022"|hs == "271025"|hs == "271026"|hs ==  "271027"|hs == "271029"|hs == "271113"|hs == "271115"|hs == "271116"
		*Example: still 1905 codes for which there are no imports from China
		*drop if imports==.
		replace imports = 0 if mi(imports)
		drop _merge

		*Total imports by sic and year (when same HS but different SIC, imports flow into SIC using shares by Dorn)
		bysort sic87dd year: egen import_industry = sum(import*share) if !mi(import)

		collapse (firstnm) country partner_class import_industry, by(sic87dd year)
		rename sic87dd sic
		destring sic, replace
		
		*Merge a few categories by hand
		replace sic = 2066 if sic == 2067
		replace sic = 2252 if sic == 2253
		replace sic = 2341 if sic == 2342
		replace sic = 3291 if sic == 3292
		replace sic = 3339 if sic == 3341
		
		collapse (firstnm) country partner_class (sum) import_industry, by(sic year)
		
		save "$clean_data_imports/c_comtrade_europe_`country'", replace
}


*Fill in missings in reporter and country
set more off
foreach country in us {
	use "$clean_data_imports/c_comtrade_europe_`country'", clear
	gsort -country
	replace country = country[_n-1] if mi(country)
	sort year sic
	save "$clean_data_imports/c_comtrade_europe_`country'", replace
}


*******************************
*Inflate imports to 2017 prices
*******************************

set more off
foreach country in us {
	import delimit "$raw_data_imports/china shock/PCE.csv", clear
	drop if year < 1993
	merge 1:m year using "$clean_data_imports/c_comtrade_europe_`country'.dta"
	gen import_industry1 = import_industry/100*base_2017
	drop import_industry base_* _merge
	rename import_industry1 imports
	sort partner_class year sic
	drop if year!=1993&year!=2000&year!=2007&year!=2014
	save "$clean_data_imports/c_comtrade_europe_`country'_1.dta", replace

}

*
*******************************************************************************************
*			Change in imports (already sum up all other imports for instrument)
*******************************************************************************************		

*Append and sum up imports
use "$clean_data_imports/c_comtrade_europe_us_1.dta", replace

*Combine sectors that are not in manufacturing in aggregate sectors
replace sic = 100 if sic <999
replace sic = 1000 if sic>=1000&sic<=1499
replace sic = 1500 if sic>=1500&sic<=1799
replace sic = 4000 if sic>=4000&sic<=4999
replace sic = 5000 if sic>=5000&sic<=5199
replace sic = 6000 if sic>=6000&sic<=6799
replace sic = 7000 if sic>=7000&sic<=8999
drop if sic>=9000 //only interested in employment from private sector afterwards (no public administration needed)

gen industry = "agriculture" if sic == 100
replace industry = "mining" if sic == 1000
replace industry = "construction" if sic == 1500
replace industry = "manufacturing" if sic >= 2000 & sic<=3999
replace industry = "utilities" if sic == 4000
replace industry = "wholesale" if sic == 5000
replace industry = "retail" if sic == 5200
replace industry = "finance" if sic == 6000
replace industry = "services" if sic == 7000

*Create different measures of imports
preserve
keep if partner_class!="Germany"
collapse (firstnm) country industry (sum) imports, by(sic year)
rename imports imports_eu7
save "$clean_data_imports/c_comtrade_eu7_us_1.dta", replace
restore

preserve
keep if partner_class=="Germany"
collapse (firstnm) country industry (sum) imports, by(sic year)
rename imports imports_ger
save "$clean_data_imports/c_comtrade_ger_us_1.dta", replace
restore

preserve
collapse (firstnm) country industry (sum) imports, by(sic year)
rename imports imports_eu8
save "$clean_data_imports/c_comtrade_eu8_us_1.dta", replace
restore

use "$clean_data_imports/c_comtrade_eu8_us_1.dta", clear
merge 1:1 sic year using "$clean_data_imports/c_comtrade_eu7_us_1.dta"
drop _merge
merge 1:1 sic year using "$clean_data_imports/c_comtrade_ger_us_1.dta"
drop _merge

replace imports_eu7 = 0 if mi(imports_eu7)
replace imports_ger = 0 if mi(imports_ger)
replace imports_eu8 = 0 if mi(imports_eu8)

order country year sic industry

save "$clean_data_imports/EU_imports_dataset", replace

*Long-run differences
use "$clean_data_imports/EU_imports_dataset", clear
save "$clean_data_imports/EU_imports_dataset_d", replace

foreach start in 1993 2000 2007 {
	foreach end in 2000 2007 2014 {
		foreach country in eu8 eu7 ger {
	use "$clean_data_imports/EU_imports_dataset", clear
	if `end' > `start' {
	keep if year == `start' | year == `end'
	egen id_year = group(year)
	xtset sic id_year
	gen d_`country'_imports_`start'_`end' = imports_`country'-L1.imports_`country' 
	*If missing value, means that no product imports before of this year for this sic (hence, increase is equal to final value)
	drop if id_year == 1
	replace d_`country'_imports_`start'_`end' = imports_`country' if mi(d_`country'_imports_`start'_`end')
	keep sic d_`country'_imports_`start'_`end' 
	save "$clean_data_imports/d_`country'_imports_`start'_`end'", replace
	merge 1:m sic using "$clean_data_imports/EU_imports_dataset_d"
	drop _merge
	save "$clean_data_imports/EU_imports_dataset_d", replace
	}
	}
	
	}
}
*If missing it means that this product in not imported in both periods (i.e. zero imports)


use "$clean_data_imports/EU_imports_dataset_d", clear
rename sic sic87dd

collapse (firstnm) d_*, by(sic87dd)

foreach var of varlist d_* {
	replace `var' = 0 if mi(`var')
}

save "$clean_data_imports/EU_imports_dataset_d", replace

use "$clean_data_imports/EU_imports_dataset_d", clear


************************
*LONG-RUN differences
************************
*from local shocks regional

foreach g in "eu7" "eu8" "ger" {
	foreach t in "1993_2000" "1993_2007" "1993_2014" "2000_2007" "2000_2014" "2007_2014" {
		local start = substr("`t'", 1, 4)
		local end = substr("`t'", 6, 4)

		* Load cleaned CZ-level data
		use "$clean_data_imports/employment_czone_industry_final", clear

		* Compute industry employment shares
		bysort czone year: egen czone_emp = total(emp)
		gen empshare = emp/czone_emp
		
		*Employment by industry over all CZ (total US)
		bysort sic87dd year: egen ind_emp = total(emp)
		
		drop emp czone_emp

		* Merge in each industry's change in import exposure
		merge m:1 sic87dd using "$clean_data_imports/EU_imports_dataset_d", assert(1 3) 
		assert sic87dd < 2000 | sic87dd > 3999 if _merge == 1
		

		* Define non-manufacturing changes in import exposure as zero
		replace d_`g'_imports_`t' = 0 if _merge == 1
		drop _merge
		
		*Change in imports (in 1'000 of 2017 USD) per worker
		replace d_`g'_imports_`t' = ((d_`g'_imports_`t'/1000)/ind_emp)
		
		* Calculate average changes in import exposure
		collapse (mean) d_czone_imp_exp_`g'_`t' = d_`g'_imports_`t' [aw = empshare], by(czone year)

		* Save the shocks
		tempfile d_czone_imp_exp_`g'_`t'
		save "`d_czone_imp_exp_`g'_`t''.dta", replace
	}
}

* Combine the various changes in CZ import exposure into a single file
use "`d_czone_imp_exp_eu7_1993_2000'.dta", clear
merge 1:1 czone using "`d_czone_imp_exp_eu7_2000_2007'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_eu7_2007_2014'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_eu7_1993_2014'.dta", assert(3) nogenerate

merge 1:1 czone using "`d_czone_imp_exp_eu8_1993_2000'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_eu8_2000_2007'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_eu8_2007_2014'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_eu8_1993_2014'.dta", assert(3) nogenerate

merge 1:1 czone using "`d_czone_imp_exp_ger_1993_2000'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_ger_2000_2007'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_ger_2007_2014'.dta", assert(3) nogenerate
merge 1:1 czone using "`d_czone_imp_exp_ger_1993_2014'.dta", assert(3) nogenerate

compress
save "$final_data_imports/czone_ExposureEuropeLong", replace



*Create stacked dataset with three (3!) periods

use "$final_data_imports/czone_ExposureEuropeLong", clear

reshape long d_czone_imp_exp_, i(czone year_emp) j(temp) string
gen years = substr(temp, 5, 10)
gen country = substr(temp, 1, 3)
drop temp
gen start = substr(years, 1, 4)
gen end = substr(years, 6, 4)
destring start end, replace

save "`imports_stacked3''.dta", replace

foreach g in "eu7" "eu8" "ger" {
	foreach t1 in "1993_2000" {
		foreach t2 in "2000_2007" {
			foreach t3 in "2007_2014" {
			use "`imports_stacked3''.dta", clear
			*keep if (years == "1993_2000" | years == "2000_2007" | years == "2007_2011") & country == "us"

			keep if (years == "`t1'" | years == "`t2'" | years == "`t3'") & country == "`g'"
			if end[1]==start[2] & end[2]==start[3] { //for example 1993-2000, 2000-2007, but not 1993-2000, 2007-2014
	
			egen group = group(czone)
			gen constant = 1
			bysort group: gen period3 = sum(constant)
			
			local period1 = substr(years[1],1,9)
			local period2 = substr(years[2],5,5)
			local period3 = substr(years[3],5,5)
			local period `period1'`period2'`period3'
			rename d_czone_imp_exp_ d3_exp_`g'_`period'

			keep czone year_emp d3_exp_`g'_`period' period3
			tempfile d_exp_`g'_`period'
			save "`d_exp_`g'_`period''.dta", replace
		}
	}
	}
	}
}
use "`d_exp_eu7_1993_2000_2007_2014'.dta", clear
merge 1:1 czone period3 using "`d_exp_eu8_1993_2000_2007_2014'.dta", assert(3) nogenerate
merge 1:1 czone period3 using "`d_exp_ger_1993_2000_2007_2014'.dta", assert(3) nogenerate

compress

gen year = 1990 if period3 == 1
replace year = 2000 if period3 == 2
replace year = 2008 if period3 == 3

rename (d3_exp_*_1993_2000_2007_2014) (d_exp_*_stacked)
keep czone year d_exp_*_stacked

save "$final_data_imports/czone_ExposureEurope.dta", replace
cap rm "`imports_stacked3''.dta"

